library(readxl)

canton_total_pop <- read_rds("~/Desktop/canton_total_pop_prov.rds")

# Completeness ------

peralta_completeness <- 
  read_xlsx("~/Downloads/12963_2019_183_MOESM2_ESM.xlsx", sheet=5, skip=1) %>% 
  rename(province = `...1`) %>% 
  mutate(province = str_pad(province, 2, side="left", pad="0"))

# PROVINCE ANALYSIS ---------

full_df_prov <- 
  full_df %>% 
  left_join(
    canton_total_pop
  ) %>% 
  mutate(
   cf = cf*total_population,
   percent_rural_corrected_ratio = percent_rural_corrected_ratio*total_population,
   electricidad_no = electricidad_no*total_population,
   materials_index = materials_index*total_population,
   elimina_servidas_red_pozo_letrina = elimina_servidas_red_pozo_letrina*total_population,
   Literate_women = Literate_women*total_population,
   InSchool_girls = InSchool_girls*total_population,
   Idioma_indigena_self = Idioma_indigena_self*total_population,
   pcv3_use = pcv3_use*total_population,
   vaccine_index = vaccine_index*total_population,
   ANC_use = ANC_use*total_population,
   ANC_visits_median_use  = ANC_visits_median_use*total_population
  ) %>% 
  group_by(province, period) %>% 
  summarize(
    ALRI5_5_plus = sum(ALRI5_5_plus, na.rm=T),
    under5population = sum(under5population, na.rm=T),
    total_population = sum(total_population, na.rm = T),
    cf = sum(cf, na.rm = T),
    percent_rural_corrected_ratio = sum(percent_rural_corrected_ratio, na.rm = T),
    electricidad_no = sum(electricidad_no, na.rm = T),
    materials_index = sum(materials_index, na.rm = T),
    elimina_servidas_red_pozo_letrina = sum(elimina_servidas_red_pozo_letrina, na.rm = T),
    Literate_women = sum(Literate_women, na.rm = T),
    InSchool_girls = sum(InSchool_girls, na.rm = T),
    Idioma_indigena_self = sum(Idioma_indigena_self, na.rm = T),
    pcv3_use = sum(pcv3_use, na.rm = T),
    vaccine_index = sum(vaccine_index, na.rm = T),
    ANC_use = sum(ANC_use, na.rm = T),
    ANC_visits_median_use  = sum(ANC_visits_median_use, na.rm = T),
  ) %>% 
  mutate(
    cf = cf / total_population,
    percent_rural_corrected_ratio = percent_rural_corrected_ratio / total_population,
    electricidad_no = electricidad_no / total_population,
    materials_index = materials_index / total_population,
    elimina_servidas_red_pozo_letrina = elimina_servidas_red_pozo_letrina / total_population,
    Literate_women = Literate_women / total_population,
    InSchool_girls = InSchool_girls / total_population,
    Idioma_indigena_self = Idioma_indigena_self / total_population,
    pcv3_use = pcv3_use / total_population,
    vaccine_index = vaccine_index / total_population,
    ANC_use = ANC_use / total_population,
    ANC_visits_median_use  = ANC_visits_median_use / total_population,
  ) %>% 
  ungroup() %>% 
  left_join(peralta_completeness %>% 
              mutate(Completeness = as.numeric(Completeness)/100,
                     one_sd_5q0 = as.numeric(`1 SD 5q0`),
                     one_1sd_5q0 = as.numeric(`1-SD 5q0`)
              ))


fixest::feols(Completeness ~ cf | province + period, full_df_prov)
fixest::feols(one_sd_5q0 ~ cf | province + period, full_df_prov)

ggplot(full_df_prov, 
       aes(x=cf, y=Completeness, shape=period, color=province, group=period))  + 
  geom_point() + 
  # geom_smooth(method="lm") + 
  scale_x_continuous(labels=scales::percent_format()) + 
  scale_y_continuous(labels=scales::percent_format()) + 
  facet_grid(.~period)


ggplot(full_df_prov %>% 
         mutate(period = ifelse(period=="1990", "1988-1992", 
                                ifelse(period=="2001", "1999-2003", 
                                       ifelse(period=="2010", "2008-2012", period)))), 
       aes(x=cf, y=Completeness, shape=period, color=province, group=period)) + 
  geom_point() + 
  scale_x_continuous(labels=scales::percent_format()) + 
  scale_y_continuous(labels=scales::percent_format()) + 
  # geom_smooth(method="lm") + 
  ylab("Peralta et al. Completeness") + 
  xlab("Province-level primary clean cooking fuel use") + 
  facet_grid(.~period)

# Models -----

# 6.1 k=3 -------

# Empty model -------
prov_lm_empty_mod <- fixest::feglm(ALRI5_5_plus ~
                            offset(log(under5population)) + 
                            
                            cf10 |
                            
                            as.factor(province) +
                            as.factor(period)
                          ,
                          data=full_df_prov %>% 
                            mutate(cf10 = cf*10),
                          family = quasipoisson())


prov_gam_empty_mod <- gam(ALRI5_5_plus ~
                             offset(log(under5population)) + 
                             
                             s(cf,k=4, fx=T) +
                             
                             as.factor(province) +
                             as.factor(period)
                           ,
                           data=full_df_prov,
                           family = quasipoisson())

plot(prov_gam_empty_mod)

# Model 1 -------

prov_lm_1_mod <- fixest::feglm(ALRI5_5_plus ~
                        offset(log(under5population)) + 
                        
                       cf10 +
                        
                        percent_rural_corrected_ratio + # not w/ outcome
                        electricidad_no + # associated
                        
                        # material_techo_nicer_all + # associated
                        # material_techo_hormigon_losa_cemento_use + # not w/ cf
                        # material_pared_hormigon_bloque_ladrillo + # close enough
                        # material_piso_nicer + # associated
                        # material_piso_tierra + # associated
                        materials_index + # associated
                        
                        # obtiene_agua_redpublica_tuberia_adentro_use + # associated
                        # servicio_higenico_redpublica_use + # associated
                        # elimina_servidas_red_pozo + # associated
                        elimina_servidas_red_pozo_letrina + # associated
                        # servicio_ducha_excluso + # associated
                        # elimina_basura_service + # not w/ outcome
                        # hygiene_index + # associated
                        
                        Literate_women + # associated
                        InSchool_girls + # associated
                        Idioma_indigena_self + # associated
                        
                        # BCG_use + # no
                        # DPT3_use + # not w/ cf
                        # SAR_use + # no
                        # Polio_use + # not w/ cf
                        pcv3_use + # not really
                        vaccine_index + # not w/ cf
                        
                        ANC_use + # pretty much 
                        ANC_visits_median_use |
                        
                        as.factor(province) +
                        as.factor(period)
                      ,
                      data=full_df_prov %>% 
                        mutate(cf10 = cf*10),
                      family = quasipoisson())



prov_gam_1_mod <- gam(ALRI5_5_plus ~
                         offset(log(under5population)) + 
                         
                         s(cf, k=4, fx=T) +
                         
                         percent_rural_corrected_ratio + # not w/ outcome
                         electricidad_no + # associated
                         
                         # material_techo_nicer_all + # associated
                         # material_techo_hormigon_losa_cemento_use + # not w/ cf
                         # material_pared_hormigon_bloque_ladrillo + # close enough
                         # material_piso_nicer + # associated
                         # material_piso_tierra + # associated
                         materials_index + # associated
                         
                         # obtiene_agua_redpublica_tuberia_adentro_use + # associated
                         # servicio_higenico_redpublica_use + # associated
                         # elimina_servidas_red_pozo + # associated
                         elimina_servidas_red_pozo_letrina + # associated
                         # servicio_ducha_excluso + # associated
                         # elimina_basura_service + # not w/ outcome
                         # hygiene_index + # associated
                         
                         Literate_women + # associated
                         InSchool_girls + # associated
                         Idioma_indigena_self + # associated
                         
                         # BCG_use + # no
                         # DPT3_use + # not w/ cf
                         # SAR_use + # no
                         # Polio_use + # not w/ cf
                         pcv3_use + # not really
                         vaccine_index + # not w/ cf
                         
                         ANC_use + # pretty much 
                         ANC_visits_median_use +
                         
                         as.factor(province) +
                         as.factor(period)
                       ,
                       data=full_df_prov,
                       family = quasipoisson())

plot(prov_gam_1_mod)


# Combine data from linear ------


alri5_glm_prov_estimates <- tidy(prov_lm_empty_mod)[1,] %>% 
  mutate(model = "Empty model") %>% 
  bind_rows(tidy(prov_lm_1_mod)[1,] %>% 
              mutate(model = "Preferred specification")) %>% 
  mutate(
    irr = exp(estimate),
    int66_hi = exp(estimate + (0.412*std.error)),
    int66_low = exp(estimate - (0.412*std.error)),
    int80_hi = exp(estimate + (0.842*std.error)),
    int80_low = exp(estimate - (0.842*std.error)),
    int95_hi = exp(estimate + (1.95996*std.error)),
    int95_low = exp(estimate - (1.95996*std.error))) 

prov_glm <- ggplot() +
  geom_interval(data=alri5_glm_prov_estimates, 
                aes(x=model, y=irr, ymin=int95_low, ymax=int95_hi, color=model), alpha=0.50) +
  geom_interval(data=alri5_glm_prov_estimates, 
                aes(x=model, y=irr, ymin=int80_low, ymax=int80_hi, color=model), alpha=0.66) +
  geom_interval(data=alri5_glm_prov_estimates, 
                aes(x=model, y=irr, ymin=int66_low, ymax=int66_hi, color=model), alpha=0.95) +  
  geom_point(data=alri5_glm_prov_estimates, 
             aes(x=model, y=irr), size=2, shape=18, color="white") + 
  
  ggsci::scale_color_lancet() + 
  
  # scale_x_discrete(limits = c("Fixed effects for canton", "Random intercepts for canton")) + 
  
  ggtitle("Province-level analysis") + 
  ylab("Mortality rate ratio (66%, 80%, 95% CI)") + xlab("") + 
  
  geom_hline(yintercept=1) + 
  scale_y_continuous(breaks=c(0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 1.2, 1.3)) +
  # coord_cartesian(ylim=c(-1, 0)) + 
  theme_bw() + 
  theme(axis.text.y = element_text(size=12, color="black"),
        axis.text.x = element_text(size=10, color="black"),
        axis.title = element_text(size=13, face="bold"),
        plot.title = element_text(size=14, face="bold"),
        legend.position = "top",
        legend.title = element_blank(),
        legend.text = element_text(size=12, color="black"))

prov_glm

# Combine data from nonlinear plots -------

prov_0 <- plot(prov_gam_empty_mod, shade=TRUE, select=1)

prov_estimate_0 <- unlist(prov_0[[1]]$fit) %>% 
  bind_cols(prov_0[[1]]$se) %>% 
  bind_cols(prov_0[[1]]$x) %>% 
  rename(cf = `...3`,
         fit = `...1`,
         se = `...2`) %>% 
  mutate(fit.high = fit + se,
         fit.low = fit - se)


prov_1 <- plot(prov_gam_1_mod, shade=TRUE, select=1)

prov_estimate_1 <- unlist(prov_1[[1]]$fit) %>% 
  bind_cols(prov_1[[1]]$se) %>% 
  bind_cols(prov_1[[1]]$x) %>% 
  rename(cf = `...3`,
         fit = `...1`,
         se = `...2`) %>% 
  mutate(fit.high = fit + se,
         fit.low = fit - se)

prov_estimates_df <- 
  prov_estimate_0 %>% 
  mutate(model = "Empty model") %>% 
  bind_rows(prov_estimate_1 %>% 
              mutate(model = "Main Model")) 

spline_plot_prov <-
  ggplot(prov_estimates_df, 
         aes(x=cf, y=fit, ymin=fit.low, ymax=fit.high, group=model, color=model)) + 
  geom_ribbon(alpha=0.1, color=NA) + 
  geom_line(size=0.8) + 
  theme_bw() + 
  # coord_cartesian(ylim=c(-.7, 2)) +
  ggsci::scale_fill_lancet() + 
  ggsci::scale_color_lancet() + 
  scale_x_continuous(labels=scales::percent_format()) + 
  xlab("Clean fuel use") + 
  ylab("Response of under-5 LRI mortality rate") + 
  ggtitle("Province-level analysis") + 
  theme(axis.text = element_text(size=11, family="Helvetica"),
        axis.title.x=element_blank(),
        legend.position = "top",
        legend.title = element_blank(),
        legend.text = element_text(size=10, family="Helvetica"),
        axis.title.y = element_text(size=11, family = "Helvetica", face="bold")) 


prov_histogram <- ggplot(full_df_prov, aes(x=cf)) +
  geom_histogram(fill="dodgerblue4", alpha=0.8, color="black", bins = 50) + 
  theme_bw() +
  scale_x_continuous(labels=scales::percent_format()) + 
  xlab("Clean fuel use") + ylab("Count of provinces") +
  theme(axis.text = element_text(size=11, family="Helvetica"),
        axis.title = element_text(size=13, family = "Helvetica", face="bold"))  


prov_spline_histogram_plot <- cowplot::plot_grid(
  spline_plot_prov,
  prov_histogram,
  align="hv",
  nrow=2,
  rel_heights = c(1, 0.5)
)

prov_spline_histogram_plot


# MAIN ANALYSIS ------

# 6. Model in GAMs --------

# 6.1 k=3 -------

# Empty model -------

alri5_gam_empty_mod <- gam(ALRI5_5_plus ~
                             offset(log(under5population)) + 
                             
                             s(cf,k=3, fx=T) +
                             
                             as.factor(canton_id_universal) +
                             as.factor(period)
                           ,
                           data=full_df,
                           family = quasipoisson())

# Model 1 -------

alri5_gam_1_mod <- gam(ALRI5_5_plus ~
                         offset(log(under5population)) + 
                         
                         s(cf) +
                         
                         percent_rural_corrected_ratio + # not w/ outcome
                         electricidad_no + # associated
                         
                         # material_techo_nicer_all + # associated
                         # material_techo_hormigon_losa_cemento_use + # not w/ cf
                         # material_pared_hormigon_bloque_ladrillo + # close enough
                         # material_piso_nicer + # associated
                         # material_piso_tierra + # associated
                         materials_index + # associated
                         
                         # obtiene_agua_redpublica_tuberia_adentro_use + # associated
                         # servicio_higenico_redpublica_use + # associated
                         # elimina_servidas_red_pozo + # associated
                         elimina_servidas_red_pozo_letrina + # associated
                         # servicio_ducha_excluso + # associated
                         # elimina_basura_service + # not w/ outcome
                         # hygiene_index + # associated
                         
                         Literate_women + # associated
                         InSchool_girls + # associated
                         Idioma_indigena_self + # associated
                         
                         # BCG_use + # no
                         # DPT3_use + # not w/ cf
                         # SAR_use + # no
                         # Polio_use + # not w/ cf
                         pcv3_use + # not really
                         vaccine_index + # not w/ cf
                         
                         ANC_use + # pretty much 
                         ANC_visits_median_use +
                         
                         as.factor(canton_id_universal) +
                         as.factor(period)
                       ,
                       data=full_df,
                       family = quasipoisson())

# plot(alri5_gam_1_mod)

# Model 2 --------

alri5_gam_2_mod <- gam(ALRI5_5_plus ~
                         offset(log(under5population)) + 
                         
                         s(cf, k=4) +
                         
                         percent_rural_corrected_ratio + # not w/ outcome
                         electricidad_no + # associated
                         
                         # material_techo_nicer_all + # associated
                         # material_techo_hormigon_losa_cemento_use + # not w/ cf
                         # material_pared_hormigon_bloque_ladrillo + # close enough
                         # material_piso_nicer + # associated
                         # material_piso_tierra + # associated
                         materials_index + # associated
                         
                         # obtiene_agua_redpublica_tuberia_adentro_use + # associated
                         # servicio_higenico_redpublica_use + # associated
                         # elimina_servidas_red_pozo + # associated
                         elimina_servidas_red_pozo_letrina + # associated
                         # servicio_ducha_excluso + # associated
                         # elimina_basura_service + # not w/ outcome
                         # hygiene_index + # associated
                         
                         # Literate_women + # associated
                         InSchool_girls + # associated
                         Idioma_indigena_self + # associated
                         
                         # BCG_use + # no
                         # DPT3_use + # not w/ cf
                         # SAR_use + # no
                         # Polio_use + # not w/ cf
                         pcv3_use + # not really
                         vaccine_index + # not w/ cf
                         
                         ANC_use + # pretty much 
                         ANC_visits_median_use+
                         
                         as.factor(canton_id_universal) +
                         as.factor(period)
                       
                       ,
                       data=full_df,
                       family = quasipoisson())

# Model 3 --------
alri5_gam_3_mod <- gam(ALRI5_5_plus ~
                         offset(log(under5population)) + 
                         
                         s(cf, k=4) +
                         
                         percent_rural_corrected_ratio + # not w/ outcome
                         electricidad_no + # associated
                         
                         # material_techo_nicer_all + # associated
                         # material_techo_hormigon_losa_cemento_use + # not w/ cf
                         # material_pared_hormigon_bloque_ladrillo + # close enough
                         # material_piso_nicer + # associated
                         # material_piso_tierra + # associated
                         materials_index + # associated
                         
                         obtiene_agua_redpublica_tuberia_adentro_use + # associated
                         # servicio_higenico_redpublica_use + # associated
                         # elimina_servidas_red_pozo + # associated
                         # elimina_servidas_red_pozo_letrina + # associated
                         # servicio_ducha_excluso + # associated
                         # elimina_basura_service + # not w/ outcome
                         # hygiene_index + # associated
                         
                         Literate_women + # associated
                         InSchool_girls + # associated
                         Idioma_indigena_self + # associated
                         
                         # BCG_use + # no
                         # DPT3_use + # not w/ cf
                         # SAR_use + # no
                         # Polio_use + # not w/ cf
                         pcv3_use + # not really
                         vaccine_index + # not w/ cf
                         
                         ANC_use + # pretty much 
                         ANC_visits_median_use +
                         
                         as.factor(canton_id_universal) +
                         as.factor(period)
                       
                       ,
                       data=full_df,
                       family = quasipoisson())

# Model 4 --------
alri5_gam_4_mod <- gam(ALRI5_5_plus ~
                         offset(log(under5population)) + 
                         
                         s(cf, k=4) +
                         
                         percent_rural_corrected_ratio + # not w/ outcome
                         electricidad_no + # associated
                         
                         # material_techo_nicer_all + # associated
                         # material_techo_hormigon_losa_cemento_use + # not w/ cf
                         # material_pared_hormigon_bloque_ladrillo + # close enough
                         # material_piso_nicer + # associated
                         # material_piso_tierra + # associated
                         materials_index + # associated
                         
                         obtiene_agua_redpublica_tuberia_adentro_use + # associated
                         # servicio_higenico_redpublica_use + # associated
                         # elimina_servidas_red_pozo + # associated
                         # elimina_servidas_red_pozo_letrina + # associated
                         # servicio_ducha_excluso + # associated
                         # elimina_basura_service + # not w/ outcome
                         # hygiene_index + # associated
                         
                         # Literate_women + # associated
                         InSchool_girls + # associated
                         Idioma_indigena_self + # associated
                         
                         # BCG_use + # no
                         # DPT3_use + # not w/ cf
                         # SAR_use + # no
                         # Polio_use + # not w/ cf
                         pcv3_use + # not really
                         vaccine_index + # not w/ cf
                         
                         ANC_use + # pretty much 
                         ANC_visits_median_use +
                         
                         as.factor(canton_id_universal) +
                         as.factor(period)
                       
                       ,
                       data=full_df,
                       family = quasipoisson())


# Combine data from all plots -------

# MAIN ANALYSIS WITHOUT QUITO + GUAYAQUIL ------

# Empty model -------

alri5_gam_empty_mod_noqg <- gam(ALRI5_5_plus ~
                             offset(log(under5population)) + 
                             
                             s(cf,k=3, fx=T) +
                             
                             as.factor(canton_id_universal) +
                             as.factor(period)
                           ,
                           data=full_df %>% 
                             filter(canton_id_universal!="09_01") %>% 
                             filter(canton_id_universal!="17_01"),
                           family = quasipoisson())

# Model 1 -------

alri5_gam_1_mod_noqg <- gam(ALRI5_5_plus ~
                         offset(log(under5population)) + 
                         
                         s(cf) +
                         
                         percent_rural_corrected_ratio + # not w/ outcome
                         electricidad_no + # associated
                         
                         # material_techo_nicer_all + # associated
                         # material_techo_hormigon_losa_cemento_use + # not w/ cf
                         # material_pared_hormigon_bloque_ladrillo + # close enough
                         # material_piso_nicer + # associated
                         # material_piso_tierra + # associated
                         materials_index + # associated
                         
                         # obtiene_agua_redpublica_tuberia_adentro_use + # associated
                         # servicio_higenico_redpublica_use + # associated
                         # elimina_servidas_red_pozo + # associated
                         elimina_servidas_red_pozo_letrina + # associated
                         # servicio_ducha_excluso + # associated
                         # elimina_basura_service + # not w/ outcome
                         # hygiene_index + # associated
                         
                         Literate_women + # associated
                         InSchool_girls + # associated
                         Idioma_indigena_self + # associated
                         
                         # BCG_use + # no
                         # DPT3_use + # not w/ cf
                         # SAR_use + # no
                         # Polio_use + # not w/ cf
                         pcv3_use + # not really
                         vaccine_index + # not w/ cf
                         
                         ANC_use + # pretty much 
                         ANC_visits_median_use +
                         
                         as.factor(canton_id_universal) +
                         as.factor(period)
                       ,
                       data=full_df %>% 
                         filter(canton_id_universal!="09_01") %>% 
                         filter(canton_id_universal!="17_01"),
                       family = quasipoisson())

# plot(alri5_gam_1_mod)

# Model 2 --------

alri5_gam_2_mod_noqg <- gam(ALRI5_5_plus ~
                         offset(log(under5population)) + 
                         
                         s(cf, k=4) +
                         
                         percent_rural_corrected_ratio + # not w/ outcome
                         electricidad_no + # associated
                         
                         # material_techo_nicer_all + # associated
                         # material_techo_hormigon_losa_cemento_use + # not w/ cf
                         # material_pared_hormigon_bloque_ladrillo + # close enough
                         # material_piso_nicer + # associated
                         # material_piso_tierra + # associated
                         materials_index + # associated
                         
                         # obtiene_agua_redpublica_tuberia_adentro_use + # associated
                         # servicio_higenico_redpublica_use + # associated
                         # elimina_servidas_red_pozo + # associated
                         elimina_servidas_red_pozo_letrina + # associated
                         # servicio_ducha_excluso + # associated
                         # elimina_basura_service + # not w/ outcome
                         # hygiene_index + # associated
                         
                         # Literate_women + # associated
                         InSchool_girls + # associated
                         Idioma_indigena_self + # associated
                         
                         # BCG_use + # no
                         # DPT3_use + # not w/ cf
                         # SAR_use + # no
                         # Polio_use + # not w/ cf
                         pcv3_use + # not really
                         vaccine_index + # not w/ cf
                         
                         ANC_use + # pretty much 
                         ANC_visits_median_use+
                         
                         as.factor(canton_id_universal) +
                         as.factor(period)
                       
                       ,
                       data=full_df %>% 
                         filter(canton_id_universal!="09_01") %>% 
                         filter(canton_id_universal!="17_01"),
                       family = quasipoisson())
# Model 3 --------
alri5_gam_3_mod_noqg <- gam(ALRI5_5_plus ~
                         offset(log(under5population)) + 
                         
                         s(cf, k=4) +
                         
                         percent_rural_corrected_ratio + # not w/ outcome
                         electricidad_no + # associated
                         
                         # material_techo_nicer_all + # associated
                         # material_techo_hormigon_losa_cemento_use + # not w/ cf
                         # material_pared_hormigon_bloque_ladrillo + # close enough
                         # material_piso_nicer + # associated
                         # material_piso_tierra + # associated
                         materials_index + # associated
                         
                         obtiene_agua_redpublica_tuberia_adentro_use + # associated
                         # servicio_higenico_redpublica_use + # associated
                         # elimina_servidas_red_pozo + # associated
                         # elimina_servidas_red_pozo_letrina + # associated
                         # servicio_ducha_excluso + # associated
                         # elimina_basura_service + # not w/ outcome
                         # hygiene_index + # associated
                         
                         Literate_women + # associated
                         InSchool_girls + # associated
                         Idioma_indigena_self + # associated
                         
                         # BCG_use + # no
                         # DPT3_use + # not w/ cf
                         # SAR_use + # no
                         # Polio_use + # not w/ cf
                         pcv3_use + # not really
                         vaccine_index + # not w/ cf
                         
                         ANC_use + # pretty much 
                         ANC_visits_median_use +
                         
                         as.factor(canton_id_universal) +
                         as.factor(period)
                       
                       ,
                       data=full_df %>% 
                         filter(canton_id_universal!="09_01") %>% 
                         filter(canton_id_universal!="17_01"),
                       family = quasipoisson())

# Model 4 --------
alri5_gam_4_mod_noqg <- gam(ALRI5_5_plus ~
                         offset(log(under5population)) + 
                         
                         s(cf, k=4) +
                         
                         percent_rural_corrected_ratio + # not w/ outcome
                         electricidad_no + # associated
                         
                         # material_techo_nicer_all + # associated
                         # material_techo_hormigon_losa_cemento_use + # not w/ cf
                         # material_pared_hormigon_bloque_ladrillo + # close enough
                         # material_piso_nicer + # associated
                         # material_piso_tierra + # associated
                         materials_index + # associated
                         
                         obtiene_agua_redpublica_tuberia_adentro_use + # associated
                         # servicio_higenico_redpublica_use + # associated
                         # elimina_servidas_red_pozo + # associated
                         # elimina_servidas_red_pozo_letrina + # associated
                         # servicio_ducha_excluso + # associated
                         # elimina_basura_service + # not w/ outcome
                         # hygiene_index + # associated
                         
                         # Literate_women + # associated
                         InSchool_girls + # associated
                         Idioma_indigena_self + # associated
                         
                         # BCG_use + # no
                         # DPT3_use + # not w/ cf
                         # SAR_use + # no
                         # Polio_use + # not w/ cf
                         pcv3_use + # not really
                         vaccine_index + # not w/ cf
                         
                         ANC_use + # pretty much 
                         ANC_visits_median_use +
                         
                         as.factor(canton_id_universal) +
                         as.factor(period)
                       
                       ,
                       data=full_df %>% 
                         filter(canton_id_universal!="09_01") %>% 
                         filter(canton_id_universal!="17_01"),
                       family = quasipoisson())


# Combine data from all plots -------

p1_0 <- plot(alri5_gam_empty_mod, shade=TRUE, select=1)

estimate1_0 <- unlist(p1_0[[1]]$fit) %>% 
  bind_cols(p1_0[[1]]$se) %>% 
  bind_cols(p1_0[[1]]$x) %>% 
  rename(cf = `...3`,
         fit = `...1`,
         se = `...2`) %>% 
  mutate(fit.high = fit + se,
         fit.low = fit - se)


p1_0_noqg <- plot(alri5_gam_empty_mod_noqg, shade=TRUE, select=1)

estimate1_0_noqg <- unlist(p1_0_noqg[[1]]$fit) %>% 
  bind_cols(p1_0_noqg[[1]]$se) %>% 
  bind_cols(p1_0_noqg[[1]]$x) %>% 
  rename(cf = `...3`,
         fit = `...1`,
         se = `...2`) %>% 
  mutate(fit.high = fit + se,
         fit.low = fit - se)


p1_1 <- plot(alri5_gam_1_mod, shade=TRUE, select=1)

estimate1_1 <- unlist(p1_1[[1]]$fit) %>% 
  bind_cols(p1_1[[1]]$se) %>% 
  bind_cols(p1_1[[1]]$x) %>% 
  rename(cf = `...3`,
         fit = `...1`,
         se = `...2`) %>% 
  mutate(fit.high = fit + se,
         fit.low = fit - se)


p1_1_noqg <- plot(alri5_gam_1_mod_noqg, shade=TRUE, select=1)

estimate1_1_noqg <- unlist(p1_1_noqg[[1]]$fit) %>% 
  bind_cols(p1_1_noqg[[1]]$se) %>% 
  bind_cols(p1_1_noqg[[1]]$x) %>% 
  rename(cf = `...3`,
         fit = `...1`,
         se = `...2`) %>% 
  mutate(fit.high = fit + se,
         fit.low = fit - se)



estimates_df <- 
  estimate1_0 %>% 
  mutate(model = "Empty model") %>% 
  bind_rows(estimate1_0_noqg %>% 
              mutate(model = "Empty model:\nNo Quito or Guayaquil")) %>% 
  bind_rows(estimate1_1 %>% 
              mutate(model = "Main Model")) %>% 
  bind_rows(estimate1_1_noqg %>% 
              mutate(model = "Main Model:\nNo Quito or Guayaquil")) 

spline_plot_noqg <-
  ggplot(estimates_df, 
         aes(x=cf, y=fit, ymin=fit.low, ymax=fit.high, group=model, color=model)) + 
  geom_ribbon(alpha=0.1, color=NA) + 
  geom_line(size=0.8) + 
  theme_bw() + 
  # coord_cartesian(ylim=c(-.7, 2)) +
  ggsci::scale_fill_lancet() + 
  ggsci::scale_color_lancet() + 
  scale_x_continuous(labels=scales::percent_format()) + 
  xlab("Clean fuel use") + 
  ylab("Response of under-5 LRI mortality rate") + 
  theme(axis.text = element_text(size=11, family="Helvetica"),
        axis.title.x=element_blank(),
        legend.position = "top",
        legend.title = element_blank(),
        legend.text = element_text(size=10, family="Helvetica"),
        axis.title.y = element_text(size=11, family = "Helvetica", face="bold")) 


histogram <- ggplot(full_df, aes(x=cf)) +
  geom_histogram(fill="dodgerblue4", alpha=0.8, color="black", bins = 50) + 
  theme_bw() +
  scale_x_continuous(labels=scales::percent_format()) + 
  xlab("Clean fuel use") + ylab("Count of cantons") +
  theme(axis.text = element_text(size=11, family="Helvetica"),
        axis.title = element_text(size=13, family = "Helvetica", face="bold"))  


preferred_spline_histogram_plot <- cowplot::plot_grid(
  preferred_spline_plot,
  histogram,
  align="hv",
  nrow=2,
  rel_heights = c(1, 0.5)
)

# preferred_spline_histogram_plot



splines_1to4_plot <- ggplot(estimates_df, 
                            aes(x=cf, y=fit, ymin=fit.low, ymax=fit.high, group=model, color=model)) + 
  geom_ribbon(alpha=0.05, fill="dodgerblue3", color=NA) +
  geom_line(size=0.7) +
  ggsci::scale_color_lancet() + 
  theme_bw() + 
  # geom_hline(yintercept = 0, linetype = 2, size=0.3) + 
  scale_x_continuous(labels=scales::percent_format()) + 
  xlab("Clean fuel use") + 
  ylab("Response of under-5 LRI mortality rate") + 
  theme(axis.text = element_text(size=11, family="Helvetica"),
        axis.title.x=element_blank(),
        legend.position = "none",
        legend.title = element_blank(),
        legend.text = element_text(size=14, family="Helvetica"),
        axis.title.y = element_text(size=13, family = "Helvetica", face="bold")) 


splines_1to4_histogram_plot <- cowplot::plot_grid(
  splines_1to4_plot,
  histogram,
  align="hv",
  nrow=2,
  rel_heights = c(1, 0.5)
)


splines_1to4_histogram_plot


alternative_specifications_fig_full <- cowplot::plot_grid(
  alternative_specifications_glm,
  splines_1to4_histogram_plot,
  nrow=2
  
)

